# Figure b1_1
rm(list=ls())

(WD <- getwd())
if (!is.null(WD)) setwd(WD)

indata0      = paste0(WD,"/_individual_data/_spfrawdata/","", collapse = NULL)
indata1      = paste0(WD,"/_other_data/_public_signals/_hard_variables/","", collapse = NULL)
indata2      = paste0(WD,"/_other_data/_public_signals/_survey_variables/","", collapse = NULL)
indata3      = paste0(WD,"/_individual_data/_otherrawdata/","", collapse = NULL)

library(haven)
library(AER)
library(sandwich)
library(lmtest)
library(pracma)
library(stargazer)
library(plm)
library(pracma)
library(DataCombine)
library(jtools)
library(plyr)
library(ggplot2)
library(tidyverse)
library(dotwhisker)

lagpad <- function(x, k) {
  res <- c(rep(NA, k), x)[1:length(x)]
  return(res)
}

n_hard    = 9
n_survey  = 4
cut       = 2000 #150

load(paste(indata3,"us_liv_cpi_ind.Rda",sep=""))
data$ID         = data$id
data            = data[data$qdate<=2020.00,]
data            = ddply(data,.(qdate),transform, cons = mean(f2,na.rm = TRUE))
data$lag        = data$flpi
data_fcast      = subset(data, select = c(qdate,fc_err, ID, p_realz, lag))

load(paste(indata1,"public_signals_hard.Rda",sep=""))
data_hard       = data
data_hard$qdate = data_hard$qdate+5/4 
data_hard       = merge(data_hard, data_fcast, by = "qdate", all = TRUE)
#data_hard       = merge(data_hard, data_tmp, by= "qdate", all = TRUE)

load(paste(indata2,"public_signals_survey.Rda",sep=""))
data_survey       = data
data_survey$qdate = data_survey$qdate+5/4
data_survey       = merge(data_survey, data_fcast, by = "qdate", all = TRUE)

iter          = 0
out_beta_hard  = rep(NA, n_hard)
out_se_hard    = rep(NA, n_hard)
out_n_hard     = rep(NA, n_hard)
out_t_hard     = rep(NA, n_hard)

for(ii in colnames(data_hard)) {
  if (ii != "qdate"  && ii != "fc_err" && ii != "ID" && ii != "p_realz") { 
    iter = iter + 1
    print(ii)
    
    data_hard$xtmp   =  scale(data_hard[[ii]])
    if  (iter == 1 || iter == 5  || iter == 6  || iter == 8)  {
      data_hard$xtmp   =  -1 * scale(data_hard[[ii]])
    }
    
    plm_hard     = plm(fc_err ~ xtmp, data=data_hard,index=c("ID", "qdate"), model="within")
    beta         = coefficients(plm_hard)
    
    data_tmp     = aggregate(x= data_hard, by =list(data_hard$qdate), FUN= "mean")
    data_tmp     = data_tmp[data_tmp$qdate<=2020.00,]
    tobs         = length(na.omit(data_tmp$xtmp))
    
    if (tobs > cut){
      std_error    = sqrt(diag(vcovDC(plm_hard, type = "HC1")))
    } else {
      std_error    = sqrt(diag(vcovHC(plm_hard, cluster = "group", type = "HC1")))  
    }
    
    out_beta_hard[iter]   = as.numeric(beta[1])
    out_se_hard[iter]     = as.numeric(std_error[1])
    out_n_hard[iter]      = nobs(plm_hard) 
    out_t_hard[iter]      = tobs
  }
}

all_hard =  data.frame(out_beta_hard, out_se_hard, out_n_hard)

clean_hard = all_hard 
term = c('NEER','FIN','IMPP','OIL','STOX','TERM','TIPS','UNMP', 'LAG')
clean_hard$term = term
names(clean_hard)[1] <- "estimate"
names(clean_hard)[2] <- "std.error"
clean_hard = clean_hard[c(9,7,1,3,4,8,2,5,6),]

#pdf(file="_figures/figure_3_2.pdf")
#dwplot(clean_hard, dot_args = list(size = 2.5), whisker_args = list(size = 1.0))+
#  theme_bw() + xlab("Standardized Coefficient") + ylab("") +
#  geom_vline(xintercept = 0, colour = "grey60", linetype = 2) +
#  ggtitle("") + xlim(-0.50, 0.50) +
#  theme(plot.title = element_text(face="bold"), legend.position="none", text = element_text(size = 14, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
#dev.off()

#pdf(file="_figures/figure_3_2.pdf")
dwplot(clean_hard, dot_args = list(size = 2.5, color ="black"), whisker_args = list(size = 1.0, color = "black"))+
  theme_bw() + xlab("Standardized Coefficient") + ylab("") +
  geom_vline(xintercept = 0, colour = "grey60", linetype = 2) +
  ggtitle("") + xlim(-0.50, 0.50) +
  theme(plot.title = element_text(face="bold"), legend.position="none", text = element_text(size = 14, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
ggsave("_figures/figure_b1_2.pdf")
#dev.off()

#write.table(format(clean_hard,digits=3), file = "_tables/table_figure_3d.txt", sep = "\t")

iter          = 0
out_beta_survey  = rep(NA, n_survey)
out_se_survey    = rep(NA, n_survey)
out_n_survey     = rep(NA, n_survey)
out_t_survey     = rep(NA, n_survey)

for(ii in colnames(data_survey)) {
  if (ii != "qdate"  && ii != "fc_err" && ii != "ID" && ii != "p_realz" && ii != "lag" && ii != "liv") { 
    iter = iter + 1
    print(ii)
    
    if  (iter != 2) {
      data_survey$xtmp   =  scale(data_survey[[ii]])
    } else if (iter ==2 ) {
      data_survey$xtmp = - scale(data_survey[[ii]])  
    }
    
    plm_survey    = plm(fc_err ~ xtmp, data=data_survey,index=c("ID", "qdate"), model="within")
    beta          = coefficients(plm_survey)
    
    data_tmp     = aggregate(x= data_hard, by =list(data_hard$qdate), FUN= "mean")
    data_tmp     = data_tmp[data_tmp$qdate<=2020.00,]
    tobs         = length(na.omit(data_tmp$xtmp))
    
    if (tobs > cut){
      std_error    = sqrt(diag(vcovDC(plm_survey, type = "HC1")))
    } else {
      std_error    = sqrt(diag(vcovHC(plm_survey, cluster = "group", type = "HC1")))  
    }
    
    out_beta_survey[iter]   = as.numeric(beta[1])
    out_se_survey[iter]     = as.numeric(std_error[1])
    out_n_survey[iter]      = nobs(plm_survey)
    out_t_survey[iter]      = tobs
  }
}

iter            = iter +1
load(paste(indata3,"us_liv_cpi_ind.Rda",sep=""))
data            = data[data$qdate<=2020.00,]
data            = ddply(data,.(qdate),transform, cons = mean(f2,na.rm = TRUE))
plm_all_cons    = plm(fc_err ~ cons, data=data,index=c("id", "qdate"), model="within")
beta            = coefficients(plm_all_cons)
std_error       = sqrt(diag(vcovHC(plm_all_cons,cluster = "group", type = "HC1")))
out_beta_survey[iter]   = as.numeric(beta[1])
out_se_survey[iter]     = as.numeric(std_error[1])
out_n_survey[iter]      = nobs(plm_survey) 

all_survey  =  data.frame(out_beta_survey, out_se_survey, out_n_survey)

clean_survey = all_survey 
term = c('MICH','SCE','SPF', 'LIV')
clean_survey$term = term
names(clean_survey)[1] <- "estimate"
names(clean_survey)[2] <- "std.error"
clean_survey = clean_survey[c(3,1,2,4),]

#pdf(file="_figures/figure_3_1.pdf")
#dwplot(clean_survey, dot_args = list(size = 2.5), whisker_args = list(size = 1.0))+
#  theme_bw() + xlab("Standardized Coefficient") + ylab("") +
#  geom_vline(xintercept = 0, colour = "grey60", linetype = 2) +
#  ggtitle("") + xlim(-0.75, 0.50) +
#  theme(plot.title = element_text(face="bold"), legend.position="none", text = element_text(size = 14, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
#dev.off()

#pdf(file="_figures/figure_3_1.pdf")
dwplot(clean_survey, dot_args = list(size = 2.5, color ="black"), whisker_args = list(size = 1.0, color = "black"))+
  theme_bw() + xlab("Standardized Coefficient") + ylab("") +
  geom_vline(xintercept = 0, colour = "grey60", linetype = 2) +
  ggtitle("") + xlim(-0.75, 0.50) +
  theme(plot.title = element_text(face="bold"), legend.position="none", text = element_text(size = 14, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
ggsave("_figures/figure_b1_1.pdf")
#dev.off()

#write.table(format(clean_survey,digits=3), file = "_tables/table_figure_3c.txt", sep = "\t")
